library(tidyverse)
library(lubridate)
library(hms)

# creating the data object with language columns
dt <- readRDS("/home/ubuntu/data/shared_folder/Data/20211111_wrangled.rds")

# change date_time of tweet and account creation columns to Jakarta time zone,
# create a column that only lists time of day of tweets and add engagement column
dt <- dt %>%
  mutate(jkt_date_time = with_tz(tweet_created_at, tzone = "Asia/Jakarta"), 
         times = as_hms(jkt_date_time), 
         jkt_author_created = with_tz(author_created, tzone = "Asia/Jakarta")) %>% 
  rowwise() %>% 
  mutate(engagement = sum(c((1-is_retweet)*tweet_retweet_count, tweet_reply_count, tweet_quote_count, tweet_like_count))) %>%
  ungroup()

# filter by language, twitter says Indonesian OR polyglot says Indonesian or Malay OR text is empty
indo_dt <- dt %>% 
  filter(lang == "in" | indo_conf > 0 | malay_conf > 0 | no_text == "TRUE")

# add a column that counts the number of search queries for each tweet
indo_dt <- indo_dt %>% 
  rowwise() %>% 
  mutate(queries_total = sum(c(otsus, papuanlivesmatter, rasisme, ulmwp_wenda, nduga, koman, bin_nugraha, 
                               nkri, teroris_kkb,UNassembly, FaktadiPapua, zanambani))) %>% 
  ungroup()

## create interim file for indo_dt to eliminate need to recreate each time restart r
saveRDS(indo_dt, file = "/home/ubuntu/data/shared_folder/Data/20220729_indo_dt.rds")
